/*
2nd part of restriction and sample 50 
*/

clear all
set more off
cap log close
log  using  .../A_2.smcl, replace

version 13

// global paths 
global datainpath "..."
global dataoutpath "..."

use "${datainpath}all_survival1.dta", clear
// basic amounts, https://www.nav.no/no/NAV+og+samfunn/Kontakt+NAV/Utbetalinger/Grunnbelopet+i+folketrygden
scalar ba1993=37033
scalar ba1994=37820
scalar ba1995=38847
scalar ba1996=40410
scalar ba1997=42000
scalar ba1998=44413
scalar ba1999=46423
scalar ba2000=48377
scalar ba2001=50603
scalar ba2002=53233
scalar ba2003=55964
scalar ba2004=58139
scalar ba2005=60059
scalar ba2006=62161
scalar ba2007=65505
scalar ba2008=69108
scalar ba2009=72006
scalar ba2010=74721
scalar ba2011=78024
gen ba = 0
forvalues i = 1993(1)2011 {
	replace ba = ba`i' if (year==`i')
}
label variable ba "Basic Amount"
gen attached=0
local i = 1993
    while `i' <= 2011 {
     replace attached = 1  if abs(wage+abs(net_naering)) > ba   & year==`i' 
    local i = `i' + 1		
}
label variable attached "employment income > Basic Amount"
bysort id: egen sum_attached = sum(attached)
label variable sum_attached "# years employment income > Basic Amount, time invariant"
* drop if never attached to any type of employment for at least 3 years, baseline
drop if sum_attached <3  
*education leavel and subject   
gen bu_nus2000_string=string(bu_nus2000)
gen level_bu_nus=substr(bu_nus2000_string,1,1)
gen subject_bu_nus=substr(bu_nus2000_string,2,1)
gen subject_bu_detailed=substr(bu_nus2000_string,2,2)
destring level_bu_nus, replace
destring subject_bu_nus,replace
destring subject_bu_detailed,replace
replace subject_bu_nus=subject_bu_nus*10
replace subject_bu_nus = subject_bu_detailed if  (level_bu_nus==3 & (subject_bu_detailed==43 | subject_bu_detailed== 55 | subject_bu_detailed==  57 | subject_bu_detailed== 16 | subject_bu_detailed==  41 | subject_bu_detailed== 42 )) | ///
						 (level_bu_nus==4 & (subject_bu_detailed==55 | subject_bu_detailed== 57 | subject_bu_detailed== 41 | subject_bu_detailed== 83 )) | ///
						 (level_bu_nus==5 & (subject_bu_detailed==43 | subject_bu_detailed== 55 | subject_bu_detailed== 57 )) | ///
						 (level_bu_nus==6 & (subject_bu_detailed==41 | subject_bu_detailed== 25 | subject_bu_detailed== 55 | subject_bu_detailed== 61 | subject_bu_detailed== 65)) | ///
						 (level_bu_nus==7 & (subject_bu_detailed==37 | subject_bu_detailed== 63 | subject_bu_detailed== 36 | subject_bu_detailed== 64 | subject_bu_detailed== 67 | subject_bu_detailed== 57 )) | ///
						 (level_bu_nus==8 & (subject_bu_detailed==63 | subject_bu_detailed== 31 | subject_bu_detailed== 64 | subject_bu_detailed== 67 )) 
replace level_bu_nus=. if level_bu_nus==9
bysort id: egen max_lev_edu_bu=max(level_bu_nus)
rename subject_bu_nus field_educ
rename max_lev_edu_bu max_lev_edu
rename level_bu_nus lev_educ

*label
label variable max_lev_edu "max level education, time invariant"
label define educatlevl    0"no education" 1"primary school" 2"lower secondary school" 3 "High school, primary education" ///
                           4 "High School, final year"  5 " Extension on secondary education" ///
		           6 "tertiary education , lower level"  7 "university and college education, higher level" 8 "Researcher" ///
		           9 "Not specified education"    , modify
label values max_lev_edu educatlevl 
label variable field_educ "field education, 2nd digit educ_"
label define subj 0 "General subjects" 10 "Hum&arts" 16 "Visual art and crafts" 20 "pedagogy" 43 "Secretarial and office skills" 61 "Nursing and caring" 64 "Dental health" ///
                  30 "social s./law" 55 "Electrical, electronic, mechanical" 40 "buss&adm" 50 "nat.scien/tecnhi" 25 "Supplementary education for teachers" 31 "Political science" ///
		  60 "health/sport"  70 "primary indu." 80 "transp/commun/other services" 83 "Hair and beauty care" 41 "Business administration" 67 "Veterinary medicine" ///
		  90 "unknown"  57 "Building and construction(architecture for higher)"  42 "Wholesale and retail sales" 65 "Therapy"  37 "Law" 63 "Medicine" 36 "psychologist" ,  modify
label values field_educ subj
* for some indivudals max education missing, either beacuse lev_edu always missing or always ==9(unknow)
drop if max_lev_edu==.
gen group_max_lev_edu=0
replace group_max_lev_edu=1 if max_lev_edu<=3 // highschool drop-out and less
replace group_max_lev_edu=2 if max_lev_edu>3 & max_lev_edu<=5 // high-school and Post-secondary non-tertiary education
replace group_max_lev_edu=3 if max_lev_edu>=6 & max_lev_edu<9 //  tertiary education,  undergraduate, graduate and post-g. level
label variable group_max_lev_edu "maximum level education, grouped, time invariant"
label define educatlevl  1"highschool drop-out and less" 2"high-school graduate and Post-secondary education" ///
			 3 "tertiary education,  undergraduate, graduate and post-g. level", modify  	       
label values max_lev_edu educatlevl 
gen field1= (field_ed == 0  |field_ed == 90) 
gen field2= (field_ed == 10  | field_ed == 16) 
gen field3= (field_ed == 20  | field_ed == 25) 
gen field4= (field_ed == 30  | field_ed == 31 | field_ed == 37) 
gen field5= (field_ed == 41  | field_ed == 42 | field_ed == 43 | field_ed == 40 ) 
gen field6= (field_ed == 55  | field_ed == 50 | field_ed == 57  ) 
gen field7= (field_ed == 61  | field_ed == 60) 
gen field8= (field_ed == 63  | field_ed == 36 | field_ed == 64 | field_ed == 65 | field_ed == 67 ) 
gen field9= (field_ed == 70) 
gen field10= (field_ed == 80 | field_ed == 83) 
gen field = field1+2*field2+3*field3+4*field4+5*field5+6*field6+7*field7+8*field8+9*field9+10*field10
gen fieldsec1= field1 |  field3 | field4 | field8
gen fieldsec2= field2
gen fieldsec3= field5
gen fieldsec4= field6
gen fieldsec5= field7
gen fieldsec6= field9
gen fieldsec7= field10
gen fieldsec=fieldsec1+fieldsec2*2+fieldsec3*3+fieldsec4*4+fieldsec5*5+fieldsec6*6+fieldsec7*7
gen group_lev_edu=.
replace group_lev_edu=1 if lev_educ<=3 // highschool drop-out and less
replace group_lev_edu=2 if lev_educ>3 & lev_educ<=5 // high-school and Post-secondary non-tertiary education
replace group_lev_edu=3 if lev_educ>=6 & lev_educ<9 //  tertiary education,  undergraduate, graduate and post-g. level
tab group_lev_edu, gen(educ_)
gen field_educ3=educ_3*field
gen field_educ2=educ_2*fieldsec
gen field_educ1=educ_1*fieldsec
tab field_educ1, gen(educ_1_)
tab field_educ2, gen(educ_2_)
tab field_educ3, gen(educ_3_)
gen p=missing(lev_educ)
bys id: egen pp=max(p)
drop if pp
drop pp p
destring,replace
gen married=(marriage==2|marriage==6)  /* i have added the reg partners */
gen female=(male==2)
label var married "married - time varying"
label var female "indicator for female"
replace kode_ong = "" if kode_ong == "A"
destring kode_ong, replace
label variable famtype "family type"
label variable gross_int_inc "Gross interest income"
label variable municipality "municipality of residence"
label variable marriage "Marital status"
label define marr  1 "unmarried" 2 "married" ///
                   3 "widow / widower" 4 "divorced" 5 "separated" ///
		           6 "registered partner"  7 "separated partner" 8 "divorced partner" ///
		           9 "surviving spouse"    , modify
label values marriage marr 
label variable stax_b "Surtax Basis"
label variable tot_tax "total Income and wealth tax"
label variable ord_income "sum of taxable income (wage+bus+pens+cap)-deduction"  
label variable paid_schol "study grant"
label variable unemp_ben "unemployment benefit"
label variable hous_allo "Housing allowance"
label variable social_contr "Social contributions paid by the local social service during the year"
label variable educ  "educational level"
label variable kid_allow "Child benefit recognized as income of the mother"
label variable retir_pens "Retirement age pension from the National Insurance Scheme"
label variable disab_ins "Disability pension from the National Insurance Scheme"
label variable dividends "dividends, including foreign companies"
label variable capital_gains "realized capital gains"
label variable social_sec_ben "Benefits from the National Insurance Retirement, disability,c etc"
label variable neg_transf "Negative transfers"
label variable empl_income "occupational income: wage+business-unemployed benefits"
label variable gross_inc "total taxable income"
label variable taxable_transf  "taxable transfer from national insurance: ret pens, unempl. ben."
label variable tax_ex_transf "tax exempt transf: house allo, study grants"
label variable tax_neg_transf "Total assessed taxes+neg transfers"
label variable tot_income  "Total income: occupational income + capital + Taxable transf+ Tax-free transf"
label variable net_wealth "net wealth"
label variable id "personal id"
label variable famid "family id"
label variable headfam "head of the family, id=famid"label variable inc_aft_tax "income aftertax: Total income - Total assessed taxes and neg transfers"
label variable inc_aft_tax_homepo "Income after taxes - Interest expenses and housing revenues"
label variable pi_pension_trygd "personal income from pensions "
label variable capital_income "capital gross income: interest income + Dividends + Capital gain + Other capital income"
label variable wage "wages and salaries"
label variable pi_wage "personal income from wages and salaries"
label variable pi_other_naering "personal imputed income from business income, not primary sector"
label variable net_naering "net business income"
label variable pi_primary_naering "personal imputed income from business income,  primary sector"
label variable kode_ongoing_educ "code ongoing education"
// kode_ongoing = A, means out of the educational system
label define ongoing  3 "High school, primary education" 4 "High School, final year" 5 " Intermediate level" ///
		           6 "tertiary education , lower level"  7 "university and college education, higher level" 8 "Researcher" ///
		           9 "Not specified education"    , modify
label values kode_ongoing_educ ongoing 

// discipline education
label variable lev_educ "level completed education, 1st digit educ_"
label define educatlevl    0"no education" 1"primary school" 2"lower secondary school" 3 "High school, primary education" 4 "High School, final year" 5 " Extension on secondary education" ///
		           6 "tertiary education , lower level"  7 "university and college education, higher level" 8 "Researcher" ///
		           9 "Not specified education"    , modify
label values lev_educ educatlevl 
** capturing last year of education (first year for which level attained education==max education level)
gen stillstudying=0
bysort id (year): replace stillstudying=1 if _n==1 & lev_edu!=.
replace stillstudying=0 if lev_edu==max_lev_edu & lev_edu!=.
bysort id (year): replace stillstudying=1 if stillstudying[_n-1]==1 & lev_edu<max_lev & lev_edu!=.
replace stillstudying=. if max_lev_edu==.
label variable stillstudying "=1 if still studying, lev_edu<max_lev_edu"
// Self-employement definition, wage earners definition, unemployment benefit definition 
replace wage=0 if wage<0
replace taxable_tran=0 if taxable_tran<0 
gen self_empl_1=0
replace self_empl_1=1 if  ///
		net_naering!=0 &   wage<abs(net_naering)  & pi_primary ==0 ///
		& pi_primary <= pi_other_naering   &  abs(net_naering)>taxable_tran

/*
exclude from the self-employment those individuals that even if they are in one of the self-employment
definition they never had the pesronal income from business income in other sector bigger
than personal income from business income in the primary sector(agriculture, fishing, mining, forestry, hunting)
*/
gen yea=0
replace yea=1 if  pi_other!=0 & abs(pi_other)>abs(pi_primary)
bysort id: egen oo=sum(yea)
replace   self_empl_1 = 0 if oo==0 
drop yea oo		
* definition wage earners and unemployed individuals
gen wage_earn_1=0
replace wage_earn_1=1 if self_empl_1==0 & wage>taxable_tran & pi_primary ==0 & wage>abs(net_naering) 
ge unemployed_1=0
replace unemployed_1=1 if self_empl_1==0 & wage_earn_1==0
** delete individuals with gaps between years
sort id year
sum age if age!=age[_n+1]-1 & id==id[_n+1]
gen noncon=1 if age!=age[_n+1]-1 & id==id[_n+1]
bys id (year): egen sumnoncon= max(noncon)
drop if sumnoncon == 1  
*** create  labour market status variables only for main definition
gen lm_status_1=self_empl_1+ 2*wage_earn_1
label define lm_status 1 "SE" 2 "WE" 0 "UE"
label values lm_status_* lm_status

label variable self_empl_1 "1 if self_employment & |business income|>wage & |business income|>ub "

label variable wage_earn_1 "1 if self_employment_1=0 & wage>ub "

label variable unemployed_1 "1 if self_employment_1=0 & wage_earn_1=0"

label variable bu_nus2000 " education, 4 digit "
*sample 50%
sort id
capture drop _merge
preserve
tempfile tmp
bysort id: keep if _n == 1
set seed 122
sample 50 
sort id
save `tmp'
restore
merge m:1 id using `tmp'
keep if _merge == 3
drop _merge  sumnoncon noncon  sum_attached attached  

save "${dataoutpath}stock_survival_10sample_all_rev1.dta", replace
log close 
